Repository details.

  • Required transcript counts matrices files and objects are included in data/transcript_counts and data/objects folders respectively.
  • An annotation package for Zv9 Ensembl GTF version 77 can be found in annotation folder.

Load required libraries.

library(scater)
library(Seurat)
library(tibble)
library(dplyr)
library(GenomicFeatures)
library(org.Dr.eg.db)
library(Danio.rerio.Zv9.77.gtf)
library(ggplot2)
library(patchwork)

Analysis.

Analysis performed using Seurat (v3.1.1 and uwot_0.1.5, largely based on guidelines described here[https://satijalab.org/seurat/v3.2/pbmc3k_tutorial.html]).

Import transcript counts.

plate.wells.384 <- paste(rep(LETTERS, each=24, len=384), rep(1:24, len=384), sep="")

# Plate 2

plate.2.transcript.counts <-
  as.data.frame(
    read.delim(
      "data/transcript_counts/PN2_TranscriptCounts.tsv",
      sep = "\t",
      stringsAsFactors = F
    )
  )

rownames(plate.2.transcript.counts) <- plate.2.transcript.counts$GENEID
colnames(plate.2.transcript.counts)[-1] <- paste("Pn2_", plate.wells.384, sep = "")

# Plate 3

plate.3.transcript.counts <-
  as.data.frame(
    read.delim(
      "data/transcript_counts/PN3_TranscriptCounts.tsv",
      sep = "\t",
      stringsAsFactors = F
    )
  )

rownames(plate.3.transcript.counts) <- plate.3.transcript.counts$GENEID
colnames(plate.3.transcript.counts)[-1] <- paste("Pn3_", plate.wells.384, sep = "")

# Plate 4

plate.4.transcript.counts <-
  as.data.frame(
    read.delim(
      "data/transcript_counts/PN4_TranscriptCounts.tsv",
      sep = "\t",
      stringsAsFactors = F
    )
  )
rownames(plate.4.transcript.counts) <- plate.4.transcript.counts$GENEID
colnames(plate.4.transcript.counts)[-1] <- paste("Pn4_", plate.wells.384, sep = "")

# Plate 5

plate.5.transcript.counts <-
  as.data.frame(
    read.delim(
      "data/transcript_counts/PN5_TranscriptCounts.tsv",
      sep = "\t",
      stringsAsFactors = F
    )
  )

colnames(plate.5.transcript.counts)[-1] <-
  paste("Pn5_", plate.wells.384, sep = "")
rownames(plate.5.transcript.counts) <-
  plate.5.transcript.counts$GENEID

# Plate 7

plate.7.transcript.counts <-
  as.data.frame(
    read.delim(
      "data/transcript_counts/PN7_TranscriptCounts.tsv",
      sep = "\t",
      stringsAsFactors = F
    )
  )

colnames(plate.7.transcript.counts)[-1] <-
  paste("Pn7_", plate.wells.384, sep = "")
rownames(plate.7.transcript.counts) <-
  plate.7.transcript.counts$GENEID

# Plate 8
plate.8.transcript.counts <-
  as.data.frame(
    read.delim(
      "data/transcript_counts/PN8_TranscriptCounts.tsv",
      sep = "\t",
      stringsAsFactors = F
    )
  )

colnames(plate.8.transcript.counts)[-1] <-
  paste("Pn8_", plate.wells.384, sep = "")
rownames(plate.8.transcript.counts) <-
  plate.8.transcript.counts$GENEID

Combine plates into one count matrix.

all.plates.transcript.counts <- dplyr::full_join(plate.2.transcript.counts, plate.3.transcript.counts, by = "GENEID") %>% 
  full_join(., plate.4.transcript.counts, by = "GENEID") %>%
  full_join(., plate.5.transcript.counts, by = "GENEID") %>% 
  full_join(., plate.7.transcript.counts, by = "GENEID") %>% 
  full_join(., plate.8.transcript.counts, by = "GENEID")

# Substitute transcript counts NAs for 0s.

all.plates.transcript.counts[is.na(all.plates.transcript.counts)] <- 0

rownames(all.plates.transcript.counts) <- all.plates.transcript.counts$GENEID

## Discard spike-in ERCC controls (not used for downstream analysis).

all.plates.transcript.counts.no.ERCCs <- all.plates.transcript.counts[-grep("^ERCC", rownames(all.plates.transcript.counts)), ]

## Create counts matrix.

all.plates.transcript.counts.no.ERCCs.mx <- as.matrix(all.plates.transcript.counts.no.ERCCs[, -1])

Transcripts/genes information.

Please install package Danio.rerio.Zv9.77.gtf located in the project’s annotation folder. Code used to create this package described in annotation/TxDb.Drerio.Ensembl.gtf.git.Zv9.R.

gene.symbols <-
  mapIds(
    org.Dr.eg.db,
    keys = all.plates.transcript.counts.no.ERCCs$GENEID,
    keytype = "ENSEMBL",
    column = "SYMBOL"
  ) %>% as.data.frame() %>% rownames_to_column(var = 'gene')

colnames(gene.symbols)[2] <- "symbol"

gene.locations <-
  mapIds(
    Danio.rerio.Zv9.77.gtf,
    keys = all.plates.transcript.counts.no.ERCCs$GENEID,
    column = "CDSCHROM",
    keytype = "GENEID"
  )

mito.genes <- gene.locations == "MT" & !is.na(gene.locations)
mito.genes <- mito.genes[which(mito.genes)] %>% names()

Create a Seurat object, pre-process and QC.

scs <-
  CreateSeuratObject(
    counts = all.plates.transcript.counts.no.ERCCs.mx,
    min.cells = 25,
    min.features = 250,
    project = "Macrophages_scRNA-seq_Zebrafish"
  )

metadata.df <- scs@meta.data

metadata.df <-
  metadata.df %>% rownames_to_column(var = "well_id") %>%
  dplyr::mutate(Injury = ifelse(
    orig.ident == "Pn8",
    "Uninjured",
    ifelse(
      orig.ident == "Pn2" | orig.ident == "Pn5",
      "1 DPI",
      ifelse(orig.ident == "Pn3", "2 DPI", "3 DPI")
    )
  ))

injury.type <- metadata.df$Injury
names(injury.type) <- metadata.df$well_id


scs <-
  AddMetaData(object = scs,
              metadata = injury.type,
              col.name = "injury")

scs@meta.data$injury <-
  factor(scs@meta.data$injury,
         levels = c("Uninjured", "1 DPI", "2 DPI", "3 DPI"))

## Add percentage of mitochondrial genes reads to metadata.

scs[["percent.mt"]] <-
  PercentageFeatureSet(scs, features = mito.genes)

# Assess UMIs, features and percentage of mitochondrial reads distribution.

VlnPlot(
  object = scs,
  features = c("nCount_RNA", "nFeature_RNA", "percent.mt"),
  group.by = "orig.ident"
)

# Plot Pn8 separately (Pn8 outliers compress y-axis scale which makes it difficult to make a clear assessment)for better visualization

VlnPlot(
  object = subset(scs, subset = orig.ident != "Pn8"),
  features = c("nCount_RNA", "nFeature_RNA", "percent.mt"),
  group.by = "orig.ident"
)

VlnPlot(
  object = subset(scs, subset = orig.ident == "Pn8"),
  features = c("nCount_RNA", "nFeature_RNA", "percent.mt"),
  group.by = "orig.ident"
)

# Assess UMIs to features relationship. 

FeatureScatter( object = subset(scs, subset = orig.ident != "Pn8"),
               feature1 = "nCount_RNA",
               feature2 = "nFeature_RNA")

FeatureScatter( object = subset(scs, subset = orig.ident == "Pn8"),
               feature1 = "nCount_RNA",
               feature2 = "nFeature_RNA")

Filter cells.

Based on the quality assessment from above, we filtered cells using the parameters below (global thresholds, i.e, non plate-specific, applied due to quality heterogenity of plates).

scs.filter <-
  subset(
    scs,
    subset = percent.mt < 10 &
      nFeature_RNA > 500 &
      nFeature_RNA < 3500 & nCount_RNA > 1000 & nCount_RNA < 15000
  )

# UMIs to features relationship post filtering.


VlnPlot(
  object = scs.filter,
  features = c("nCount_RNA", "nFeature_RNA", "percent.mt"),
  group.by = "orig.ident"
)

FeatureScatter(object = scs.filter,
               feature1 = "nCount_RNA",
               feature2 = "nFeature_RNA")

Top 500 most expressed genes.

scs.sce <- as.SingleCellExperiment(scs.filter, assay = "RNA")

features.QC <- perFeatureQCMetrics(scs.sce)

rownames(features.QC) <- rownames(scs.sce)
features.QC$gene_id <- rownames(features.QC)

top.500.most.expressed <- dplyr::arrange(features.QC %>% as.data.frame(), desc(mean)) %>% head(n = 500)

colnames(top.500.most.expressed)[3] <- "gene"

top.500.most.expressed.annotated <- left_join(top.500.most.expressed, gene.symbols, by = "gene")

SCT assay.

SCTransform.

scs.filter <-
  SCTransform(scs.filter,
              vars.to.regress = "percent.mt",
              verbose = TRUE)

Dimensional Reduction

PCA

scs.filter <- RunPCA(scs.filter, verbose = FALSE, assay = 'SCT')

DimPlot(scs.filter, label = FALSE, group.by = 'injury')

ElbowPlot(scs.filter, ndims = 50)

UMAP

scs.filter <-
  RunUMAP(scs.filter,
          dims = 1:50,
          verbose = TRUE,
          assay = 'SCT')
          

DimPlot(scs.filter, group.by = "injury")

Clustering

scs.filter <-
  FindNeighbors(scs.filter,
                dims = 1:50,
                verbose = TRUE,
                assay = 'SCT')

scs.filter <-
  FindClusters(
    scs.filter,
    verbose = TRUE,
    resolution = 0.5,
    assay = 'SCT'
  )
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1309
## Number of edges: 51738
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8239
## Number of communities: 8
## Elapsed time: 0 seconds
DimPlot(scs.filter, label = FALSE, group.by = "ident") #+ NoLegend()

# Cluster by injury table

cluster.frequency.table <-
  scs.filter@meta.data %>%
  dplyr::count(seurat_clusters, injury) %>%
  dplyr::group_by(seurat_clusters) %>%
  dplyr::mutate(freq = n / sum(n)) %>%
  ungroup()

knitr::kable(caption = "Table of cluster by injury counts and frequencies",
             col.names = c("Cluster", "Injury", "n", "Freq."),
  cluster.frequency.table %>% arrange(seurat_clusters)
  )
Table of cluster by injury counts and frequencies
Cluster Injury n Freq.
0 Uninjured 2 0.0053908
0 1 DPI 31 0.0835580
0 2 DPI 116 0.3126685
0 3 DPI 222 0.5983827
1 Uninjured 1 0.0028736
1 1 DPI 315 0.9051724
1 2 DPI 19 0.0545977
1 3 DPI 13 0.0373563
2 Uninjured 3 0.0135747
2 1 DPI 40 0.1809955
2 2 DPI 59 0.2669683
2 3 DPI 119 0.5384615
3 Uninjured 169 0.9285714
3 3 DPI 13 0.0714286
4 Uninjured 12 0.1739130
4 1 DPI 3 0.0434783
4 2 DPI 10 0.1449275
4 3 DPI 44 0.6376812
5 Uninjured 2 0.0408163
5 1 DPI 5 0.1020408
5 2 DPI 11 0.2244898
5 3 DPI 31 0.6326531
6 Uninjured 8 0.1666667
6 1 DPI 5 0.1041667
6 2 DPI 2 0.0416667
6 3 DPI 33 0.6875000
7 Uninjured 4 0.1904762
7 1 DPI 1 0.0476190
7 2 DPI 9 0.4285714
7 3 DPI 7 0.3333333

RNA assay

Use for cluster marker detection, feature plots and trajectory analysis.

LogNorm and Scale.

mmp9 and nampta expression levels.

mmp9.vlnplot.1 <-
  VlnPlot(scs.filter, features = "ENSDARG00000042816") + NoLegend() + xlab("Cluster") + ggtitle(NULL)
mmp9.vlnplot.2 <-
  VlnPlot(scs.filter, features = "ENSDARG00000042816", group.by = 'injury') + NoLegend() + xlab("Time point post injury") + ggtitle(NULL)

mmp9.patchwork <- mmp9.vlnplot.1 + mmp9.vlnplot.2

mmp9.patchwork + plot_annotation(title = "mmp9 expression levels (logn counts)",
                                 theme = theme(plot.title = element_text(face = "bold", size = 16)))

nampta.vlnplot.1 <-
  VlnPlot(scs.filter, features = "ENSDARG00000030598") + NoLegend() +  xlab("Cluster") + ggtitle(NULL)
nampta.vlnplot.2 <-
  VlnPlot(scs.filter, features = "ENSDARG00000030598", group.by = 'injury') + NoLegend() + xlab("Time point post injury") + ggtitle(NULL)

nampta.patchwork <- nampta.vlnplot.1 + nampta.vlnplot.2

nampta.patchwork + plot_annotation(title = "nampta expression levels (logn counts)",
                                 theme = theme(plot.title = element_text(face = "bold", size = 16)))

Clusters markers

clusters.markers <- FindAllMarkers(scs.filter, min.pct = 0.2, assay = 'RNA', test.use = "wilcox")

de.clusters.markers <- clusters.markers %>% dplyr::filter(p_val_adj < 0.05)

Selected genes feature plots.

Please note that the percentage values described in Figure 3.e plots were obtained from the cluster.markers dataframe, for cluster specific gene expression, or top.500.most.expressed dataframe, when global gene expression was described.

selected.genes <- c("lcp1", "cd63", "arg2", "mmp13a", "mmp9", "nampta")

# Examples of how to obtain percentage of cells expressing cells selected markers.

# dplyr::filter(top.500.most.expressed, gene %in% dplyr::filter(gene.symbols, symbol %in% selected.genes)$gene)
# dplyr::filter(clusters.markers, gene %in% dplyr::filter(gene.symbols, symbol %in% selected.genes)$gene, cluster == "2")

selected.genes.annotated <- dplyr::filter(gene.symbols, symbol %in% selected.genes)

selected.genes.annotated <-
  selected.genes.annotated %>% arrange(factor(symbol,
                                              levels = selected.genes))


markers.set.plot <-
  FeaturePlot(
    object = scs.filter,
    features = selected.genes.annotated$gene,
    cols = c("lightblue", "darkred"),
    pt.size = 2,
    combine = FALSE
  )

names(markers.set.plot) <- selected.genes.annotated$symbol

markers.set.plot.l <- lapply(names(markers.set.plot), function(x) markers.set.plot[[x]] + labs(title = x))

cowplot::plot_grid(plotlist = markers.set.plot.l)

Antigen processing and presentation genes.

antigen.processing.presenting.genes <- c("cd83", "cd81a", "cd40","cd74b", "cd9b", "cd164", "cd276", "cd99", "cd82b", "cd82a", "cd99l2", "cd74a")

# Examples of how to obtain percentage of cells expressing antigen processing and presentation genes.

# dplyr::filter(top.500.most.expressed, gene %in% dplyr::filter(gene.symbols, symbol %in% antigen.processing.presenting.genes)$gene)
# dplyr::filter(clusters.markers, gene %in% dplyr::filter(gene.symbols, symbol %in% antigen.processing.presenting.genes)$gene, cluster == "1")


antigen.processing.presenting.genes.annotated <-
  dplyr::filter(gene.symbols, symbol %in% antigen.processing.presenting.genes)

antigen.processing.presenting.genes.annotated <-
  antigen.processing.presenting.genes.annotated %>% arrange(factor(
    symbol,
    levels = antigen.processing.presenting.genes)
    )


antigen.processing.presenting.genes.plot <-
  FeaturePlot(
    object = scs.filter,
    features = antigen.processing.presenting.genes.annotated$gene,
    cols = c("lightblue", "darkred"),
    pt.size = 2,
    combine = FALSE
  )

names(antigen.processing.presenting.genes.plot) <- antigen.processing.presenting.genes.annotated$symbol

antigen.processing.presenting.genes.plot.l <- lapply(names(antigen.processing.presenting.genes.plot), function(x) antigen.processing.presenting.genes.plot[[x]] + labs(title = x))

cowplot::plot_grid(plotlist = antigen.processing.presenting.genes.plot.l)

Extended Figure 5.b and 5.e violin plots.

# pou2f3 == ENSDARG00000052387

VlnPlot(scs.filter, features = "ENSDARG00000052387") + NoLegend() + xlab("Cluster") + ggtitle("pou2f3")

VlnPlot(scs.filter, features = "ENSDARG00000055158") + NoLegend() + xlab("Cluster") + ggtitle("prox1a")

Save Seurat object for trajectory analysis.

See Trajectory_Analysis.ipynb

save(scs.filter, file = "data/objects/scs_filter.Rdata")

Session information

sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] patchwork_1.1.0              Danio.rerio.Zv9.77.gtf_1.0.0
##  [3] org.Dr.eg.db_3.10.0          GenomicFeatures_1.38.2      
##  [5] AnnotationDbi_1.48.0         dplyr_1.0.2                 
##  [7] tibble_3.0.3                 Seurat_3.1.1                
##  [9] scater_1.14.6                ggplot2_3.3.2               
## [11] SingleCellExperiment_1.8.0   SummarizedExperiment_1.16.1 
## [13] DelayedArray_0.12.3          BiocParallel_1.20.1         
## [15] matrixStats_0.56.0           Biobase_2.46.0              
## [17] GenomicRanges_1.38.0         GenomeInfoDb_1.22.1         
## [19] IRanges_2.20.2               S4Vectors_0.24.4            
## [21] BiocGenerics_0.32.0         
## 
## loaded via a namespace (and not attached):
##   [1] BiocFileCache_1.10.2     sn_1.6-2                 plyr_1.8.6              
##   [4] igraph_1.2.5             lazyeval_0.2.2           splines_3.6.2           
##   [7] listenv_0.8.0            TH.data_1.0-10           digest_0.6.25           
##  [10] htmltools_0.5.0          viridis_0.5.1            magrittr_1.5            
##  [13] memoise_1.1.0            cluster_2.1.0            ROCR_1.0-11             
##  [16] globals_0.12.5           Biostrings_2.54.0        RcppParallel_5.0.2      
##  [19] R.utils_2.9.2            sandwich_2.5-1           askpass_1.1             
##  [22] prettyunits_1.1.1        colorspace_1.4-1         blob_1.2.1              
##  [25] rappdirs_0.3.1           ggrepel_0.8.2            xfun_0.16               
##  [28] crayon_1.3.4             RCurl_1.98-1.2           jsonlite_1.7.0          
##  [31] survival_3.1-8           zoo_1.8-8                ape_5.4-1               
##  [34] glue_1.4.1               gtable_0.3.0             zlibbioc_1.32.0         
##  [37] XVector_0.26.0           leiden_0.3.3             BiocSingular_1.2.2      
##  [40] future.apply_1.6.0       scales_1.1.1             mvtnorm_1.1-1           
##  [43] DBI_1.1.0                bibtex_0.4.2.2           Rcpp_1.0.5              
##  [46] metap_1.4                plotrix_3.7-8            progress_1.2.2          
##  [49] viridisLite_0.3.0        reticulate_1.16          bit_4.0.4               
##  [52] rsvd_1.0.3               SDMTools_1.1-221         tsne_0.1-3              
##  [55] htmlwidgets_1.5.1        httr_1.4.2               RColorBrewer_1.1-2      
##  [58] TFisher_0.2.0            ellipsis_0.3.1           ica_1.0-2               
##  [61] farver_2.0.3             XML_3.99-0.3             pkgconfig_2.0.3         
##  [64] R.methodsS3_1.8.0        dbplyr_2.0.0             uwot_0.1.5              
##  [67] labeling_0.3             tidyselect_1.1.0         rlang_0.4.7             
##  [70] reshape2_1.4.4           munsell_0.5.0            tools_3.6.2             
##  [73] generics_0.0.2           RSQLite_2.2.0            mathjaxr_1.0-1          
##  [76] ggridges_0.5.2           evaluate_0.14            stringr_1.4.0           
##  [79] yaml_2.2.1               knitr_1.29               bit64_4.0.2             
##  [82] fitdistrplus_1.1-1       purrr_0.3.4              RANN_2.6.1              
##  [85] pbapply_1.4-3            future_1.18.0            nlme_3.1-143            
##  [88] R.oo_1.23.0              biomaRt_2.42.1           compiler_3.6.2          
##  [91] curl_4.3                 beeswarm_0.2.3           plotly_4.9.2.1          
##  [94] png_0.1-7                stringi_1.4.6            highr_0.8               
##  [97] RSpectra_0.16-0          lattice_0.20-38          Matrix_1.2-18           
## [100] multtest_2.42.0          vctrs_0.3.2              mutoss_0.1-12           
## [103] pillar_1.4.6             lifecycle_0.2.0          Rdpack_1.0.0            
## [106] lmtest_0.9-37            RcppAnnoy_0.0.16         BiocNeighbors_1.4.2     
## [109] data.table_1.13.0        cowplot_1.0.0            bitops_1.0-6            
## [112] irlba_2.3.3              gbRd_0.4-11              rtracklayer_1.46.0      
## [115] R6_2.4.1                 KernSmooth_2.23-16       gridExtra_2.3           
## [118] vipor_0.4.5              codetools_0.2-16         assertthat_0.2.1        
## [121] MASS_7.3-51.5            openssl_1.4.2            withr_2.2.0             
## [124] GenomicAlignments_1.22.1 Rsamtools_2.2.3          sctransform_0.2.1       
## [127] mnormt_1.5-6             multcomp_1.4-13          GenomeInfoDbData_1.2.2  
## [130] hms_0.5.3                grid_3.6.2               tidyr_1.1.1             
## [133] rmarkdown_2.5            DelayedMatrixStats_1.8.0 Rtsne_0.15              
## [136] numDeriv_2016.8-1.1      ggbeeswarm_0.6.0